{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "510fd5b8-7ddc-4fbf-9b2f-cda3154b10fd",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T03:39:34.282902Z",
     "iopub.status.busy": "2022-05-04T03:39:34.282601Z",
     "iopub.status.idle": "2022-05-04T03:39:35.274722Z",
     "shell.execute_reply": "2022-05-04T03:39:35.274123Z",
     "shell.execute_reply.started": "2022-05-04T03:39:34.282829Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# 导入包\n",
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1a136500-476a-467e-a976-6168de8441bb",
   "metadata": {},
   "source": [
    "## 小结\n",
    "1. apply和applymap\n",
    "    1. 直接对Pandas对象使用NumPy的函数\n",
    "    2. apply按行/列执行函数，applymap对所有与元素执行函数\n",
    "2. 排序\n",
    "    * 按索引排序 & 按值排序\n",
    "3. 唯一值和成员属性\n",
    "    * Series才有唯一值操作，DataFrame只能查确认值是否存在\n",
    "4. 处理缺失值\n",
    "    1. 先判断，再处理\n",
    "    2. 丢弃缺失数据 & 填充值替换"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f0d2b404-3cd4-49e7-accf-8471fd2bb91e",
   "metadata": {},
   "source": [
    "## 一、apply和applymap"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "736af490-2ebc-4957-a39c-58722a07a254",
   "metadata": {},
   "source": [
    "### 1.1 直接使用NumPy函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "e41c9806-96fd-49e6-a1b3-cd39305634e6",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T03:41:59.216075Z",
     "iopub.status.busy": "2022-05-04T03:41:59.215820Z",
     "iopub.status.idle": "2022-05-04T03:41:59.228233Z",
     "shell.execute_reply": "2022-05-04T03:41:59.227061Z",
     "shell.execute_reply.started": "2022-05-04T03:41:59.216047Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2         3\n",
      "0 -1.082007  0.902550 -0.962268 -0.558446\n",
      "1  0.641768  0.499698  0.035714  0.841339\n",
      "2 -0.595542 -0.044457 -2.549478  0.025623\n",
      "3 -1.185363 -1.405229 -0.076391 -1.262991\n",
      "4 -1.438575 -0.502771  0.853552 -1.015997\n"
     ]
    }
   ],
   "source": [
    "# 创建DataFrame对象\n",
    "df = pd.DataFrame(np.random.randn(5, 4))\n",
    "print(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "a5119e1a-ddc1-41bc-a8fe-c17a34230d96",
   "metadata": {
    "collapsed": true,
    "execution": {
     "iopub.execute_input": "2022-05-04T04:19:18.851873Z",
     "iopub.status.busy": "2022-05-04T04:19:18.851619Z",
     "iopub.status.idle": "2022-05-04T04:19:18.859124Z",
     "shell.execute_reply": "2022-05-04T04:19:18.858452Z",
     "shell.execute_reply.started": "2022-05-04T04:19:18.851844Z"
    },
    "jupyter": {
     "outputs_hidden": true
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2         3\n",
      "0  1.082007  0.902550  0.962268  0.558446\n",
      "1  0.641768  0.499698  0.035714  0.841339\n",
      "2  0.595542  0.044457  2.549478  0.025623\n",
      "3  1.185363  1.405229  0.076391  1.262991\n",
      "4  1.438575  0.502771  0.853552  1.015997\n"
     ]
    }
   ],
   "source": [
    "# 使用NumPy函数对DataFrame求绝对值\n",
    "df = np.abs(df)\n",
    "print(np.abs(df))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0e186f7e-bd3d-487f-a5ff-364decca013e",
   "metadata": {},
   "source": [
    "### 1.2 apply函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "67e95934-89bf-45f2-b1f3-2622965afeac",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T04:19:26.804856Z",
     "iopub.status.busy": "2022-05-04T04:19:26.804542Z",
     "iopub.status.idle": "2022-05-04T04:19:26.812567Z",
     "shell.execute_reply": "2022-05-04T04:19:26.811847Z",
     "shell.execute_reply.started": "2022-05-04T04:19:26.804828Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    1.438575\n",
       "1    1.405229\n",
       "2    2.549478\n",
       "3    1.262991\n",
       "dtype: float64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 通过apply将函数应用到列或者行，默认按列\n",
    "df.apply(lambda x: x.max())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "aa64ff9a-31d9-4f66-ae46-ab446db26fdf",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T06:48:17.681232Z",
     "iopub.status.busy": "2022-05-04T06:48:17.680824Z",
     "iopub.status.idle": "2022-05-04T06:48:17.690129Z",
     "shell.execute_reply": "2022-05-04T06:48:17.689158Z",
     "shell.execute_reply.started": "2022-05-04T06:48:17.681110Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    1.082007\n",
       "1    0.841339\n",
       "2    2.549478\n",
       "3    1.405229\n",
       "4    1.438575\n",
       "dtype: float64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 通过apply将函数应用到列或者行，指定按行\n",
    "df.apply(lambda x: x.max(), axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "52e14061-f419-4f8c-8d7e-bb3a91aaf574",
   "metadata": {},
   "source": [
    "### 1.3 applymap函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "4756617c-ffbc-4842-b3a0-fe7e94f153bd",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T07:05:49.589202Z",
     "iopub.status.busy": "2022-05-04T07:05:49.588932Z",
     "iopub.status.idle": "2022-05-04T07:05:49.625384Z",
     "shell.execute_reply": "2022-05-04T07:05:49.595555Z",
     "shell.execute_reply.started": "2022-05-04T07:05:49.589174Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "      0     1     2     3\n",
      "0  1.08  0.90  0.96  0.56\n",
      "1  0.64  0.50  0.04  0.84\n",
      "2  0.60  0.04  2.55  0.03\n",
      "3  1.19  1.41  0.08  1.26\n",
      "4  1.44  0.50  0.85  1.02\n"
     ]
    }
   ],
   "source": [
    "# 使用applymap函数对每个元素进行格式设置\n",
    "print(df.applymap(lambda x: \"%.2f\" % x))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a29fcb1a-cc52-4d2f-b39b-cd1681538df7",
   "metadata": {},
   "source": [
    "## 二、排序"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "002a7544-7f11-4e96-87d0-f592280ae8f0",
   "metadata": {},
   "source": [
    "### 2.1 按索引排序"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1918810f-ef55-4381-b3aa-20591f47dc8c",
   "metadata": {},
   "source": [
    "#### 2.1.1 Series按索引排序"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "9bf4eaef-da7f-45a5-875b-caa002497698",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T07:39:06.978639Z",
     "iopub.status.busy": "2022-05-04T07:39:06.978390Z",
     "iopub.status.idle": "2022-05-04T07:39:06.985534Z",
     "shell.execute_reply": "2022-05-04T07:39:06.984664Z",
     "shell.execute_reply.started": "2022-05-04T07:39:06.978613Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "d    0\n",
      "b    1\n",
      "c    2\n",
      "a    3\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# 创建Series对象\n",
    "s1 = pd.Series(np.arange(4), index=[\"d\", \"b\", \"c\", \"a\"])\n",
    "print(s1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "4b6ed534-154e-4518-94f5-63e572875373",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T07:44:19.972510Z",
     "iopub.status.busy": "2022-05-04T07:44:19.972214Z",
     "iopub.status.idle": "2022-05-04T07:44:19.981155Z",
     "shell.execute_reply": "2022-05-04T07:44:19.980479Z",
     "shell.execute_reply.started": "2022-05-04T07:44:19.972485Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    3\n",
       "b    1\n",
       "c    2\n",
       "d    0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# s1按索引排序，升序\n",
    "s1.sort_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "9f134daf-4dde-4b7f-952b-e68296ad092e",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T07:53:10.467369Z",
     "iopub.status.busy": "2022-05-04T07:53:10.466900Z",
     "iopub.status.idle": "2022-05-04T07:53:10.473604Z",
     "shell.execute_reply": "2022-05-04T07:53:10.472955Z",
     "shell.execute_reply.started": "2022-05-04T07:53:10.467343Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "d    0\n",
       "c    2\n",
       "b    1\n",
       "a    3\n",
       "dtype: int64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# s1按索引排序，降序\n",
    "s1.sort_index(ascending=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "92bd5a7e-6409-4f1c-a1e1-64444b5d671e",
   "metadata": {},
   "source": [
    "#### 2.1.2 DataFrame按索引排序"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "2edee851-f053-438b-9f46-358a4f40e9f1",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T09:20:59.311245Z",
     "iopub.status.busy": "2022-05-04T09:20:59.310994Z",
     "iopub.status.idle": "2022-05-04T09:20:59.317395Z",
     "shell.execute_reply": "2022-05-04T09:20:59.316791Z",
     "shell.execute_reply.started": "2022-05-04T09:20:59.311219Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   B  C  A\n",
      "b  3  1  0\n",
      "d  7 -1  6\n",
      "c  9  4 -3\n",
      "a  0  8  2\n"
     ]
    }
   ],
   "source": [
    "# 创建DataFrame对象\n",
    "df1 = pd.DataFrame(\n",
    "    {\n",
    "        \"B\": {\"b\": 3, \"d\": 7, \"c\": 9, \"a\": 0},\n",
    "        \"C\": {\"b\": 1, \"d\": -1, \"c\": 4, \"a\": 8},\n",
    "        \"A\": {\"b\": 0, \"d\": 6, \"c\": -3, \"a\": 2},\n",
    "    }\n",
    ")\n",
    "print(df1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "40634912-d473-4830-ab3d-ef90fc7c3199",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T09:24:43.459142Z",
     "iopub.status.busy": "2022-05-04T09:24:43.458888Z",
     "iopub.status.idle": "2022-05-04T09:24:43.465471Z",
     "shell.execute_reply": "2022-05-04T09:24:43.464721Z",
     "shell.execute_reply.started": "2022-05-04T09:24:43.459115Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   B  C  A\n",
      "a  0  8  2\n",
      "b  3  1  0\n",
      "c  9  4 -3\n",
      "d  7 -1  6\n"
     ]
    }
   ],
   "source": [
    "# 按照行排序\n",
    "print(df1.sort_index())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "45cee26c-cac4-4658-8cc1-acec20a90103",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T09:24:52.911883Z",
     "iopub.status.busy": "2022-05-04T09:24:52.911621Z",
     "iopub.status.idle": "2022-05-04T09:24:52.919055Z",
     "shell.execute_reply": "2022-05-04T09:24:52.918279Z",
     "shell.execute_reply.started": "2022-05-04T09:24:52.911855Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   A  B  C\n",
      "b  0  3  1\n",
      "d  6  7 -1\n",
      "c -3  9  4\n",
      "a  2  0  8\n"
     ]
    }
   ],
   "source": [
    "# 按照列排序\n",
    "print(df1.sort_index(axis=1))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "272694b1-bedc-4857-83da-cbacadf015ea",
   "metadata": {},
   "source": [
    "### 2.2 按值排序"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "40d8cdbe-e681-4efd-8c4b-3aaa221adc34",
   "metadata": {},
   "source": [
    "#### 2.2.1 Series按值排序"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "baee0b76-b9ac-483e-970d-32b40eba62b9",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T08:25:53.619013Z",
     "iopub.status.busy": "2022-05-04T08:25:53.618753Z",
     "iopub.status.idle": "2022-05-04T08:25:53.623466Z",
     "shell.execute_reply": "2022-05-04T08:25:53.622858Z",
     "shell.execute_reply.started": "2022-05-04T08:25:53.618986Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "d    0\n",
      "b    1\n",
      "c    2\n",
      "a    3\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# 查看s1\n",
    "print(s1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "a318e1b6-e463-4afb-9def-bd7ce7de4e8c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T08:30:51.857456Z",
     "iopub.status.busy": "2022-05-04T08:30:51.857203Z",
     "iopub.status.idle": "2022-05-04T08:30:51.862715Z",
     "shell.execute_reply": "2022-05-04T08:30:51.862092Z",
     "shell.execute_reply.started": "2022-05-04T08:30:51.857430Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    3\n",
      "c    2\n",
      "b    1\n",
      "d    0\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# s1按值进行排序，降序\n",
    "print(s1.sort_values(ascending=False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "d6997c84-342e-459c-87fa-b324ef54ae8e",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T08:39:22.899147Z",
     "iopub.status.busy": "2022-05-04T08:39:22.898883Z",
     "iopub.status.idle": "2022-05-04T08:39:22.904911Z",
     "shell.execute_reply": "2022-05-04T08:39:22.903991Z",
     "shell.execute_reply.started": "2022-05-04T08:39:22.899120Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "d    0.0\n",
      "b    1.0\n",
      "c    2.0\n",
      "a    NaN\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "# 改造s1,把a索引的值改成nan\n",
    "s1[\"a\"] = np.nan\n",
    "print(s1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "89e1c0a2-545a-42de-afcf-c148f6e07ad4",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T08:42:04.263319Z",
     "iopub.status.busy": "2022-05-04T08:42:04.263032Z",
     "iopub.status.idle": "2022-05-04T08:42:04.268613Z",
     "shell.execute_reply": "2022-05-04T08:42:04.267870Z",
     "shell.execute_reply.started": "2022-05-04T08:42:04.263283Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "c    2.0\n",
      "b    1.0\n",
      "d    0.0\n",
      "a    NaN\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "# s1按值进行排序，降序\n",
    "print(s1.sort_values(ascending=False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "579c2683-7b3c-43e5-9955-7f60b63e64f5",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T08:46:23.379736Z",
     "iopub.status.busy": "2022-05-04T08:46:23.379481Z",
     "iopub.status.idle": "2022-05-04T08:46:23.385046Z",
     "shell.execute_reply": "2022-05-04T08:46:23.384021Z",
     "shell.execute_reply.started": "2022-05-04T08:46:23.379710Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "d    0.0\n",
      "b    1.0\n",
      "c    2.0\n",
      "a    NaN\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "# s1按值进行排序，默认升序\n",
    "print(s1.sort_values())"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c841868e-7f46-4b21-afff-cd9fa9f8c319",
   "metadata": {},
   "source": [
    "#### 2.2.2 DataFrame按值排序"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "a691549d-e058-48dd-97de-bb86d6760ac0",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T09:27:57.025541Z",
     "iopub.status.busy": "2022-05-04T09:27:57.025208Z",
     "iopub.status.idle": "2022-05-04T09:27:57.031286Z",
     "shell.execute_reply": "2022-05-04T09:27:57.030672Z",
     "shell.execute_reply.started": "2022-05-04T09:27:57.025516Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   B  C  A\n",
      "b  3  1  0\n",
      "d  7 -1  6\n",
      "c  9  4 -3\n",
      "a  0  8  2\n"
     ]
    }
   ],
   "source": [
    "# 查看df1\n",
    "print(df1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "f2bd7260-e63b-4a4d-8cbb-4002dbb9cef6",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T08:53:47.843408Z",
     "iopub.status.busy": "2022-05-04T08:53:47.843170Z",
     "iopub.status.idle": "2022-05-04T08:53:48.298748Z",
     "shell.execute_reply": "2022-05-04T08:53:48.297681Z",
     "shell.execute_reply.started": "2022-05-04T08:53:47.843385Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "DataFrame.sort_values() missing 1 required positional argument: 'by'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "Input \u001b[0;32mIn [25]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdf1\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msort_values\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m/opt/miniconda3/envs/jpt/lib/python3.10/site-packages/pandas/util/_decorators.py:311\u001b[0m, in \u001b[0;36mdeprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    305\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(args) \u001b[38;5;241m>\u001b[39m num_allow_args:\n\u001b[1;32m    306\u001b[0m     warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m    307\u001b[0m         msg\u001b[38;5;241m.\u001b[39mformat(arguments\u001b[38;5;241m=\u001b[39marguments),\n\u001b[1;32m    308\u001b[0m         \u001b[38;5;167;01mFutureWarning\u001b[39;00m,\n\u001b[1;32m    309\u001b[0m         stacklevel\u001b[38;5;241m=\u001b[39mstacklevel,\n\u001b[1;32m    310\u001b[0m     )\n\u001b[0;32m--> 311\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "\u001b[0;31mTypeError\u001b[0m: DataFrame.sort_values() missing 1 required positional argument: 'by'"
     ]
    }
   ],
   "source": [
    "# 运行df1按值排序\n",
    "df1.sort_values()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "7a95f4d0-b2a0-4ff6-8a47-d3839c1f32e0",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T09:28:37.040111Z",
     "iopub.status.busy": "2022-05-04T09:28:37.039826Z",
     "iopub.status.idle": "2022-05-04T09:28:37.046770Z",
     "shell.execute_reply": "2022-05-04T09:28:37.045869Z",
     "shell.execute_reply.started": "2022-05-04T09:28:37.040087Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   B  C  A\n",
      "c  9  4 -3\n",
      "b  3  1  0\n",
      "a  0  8  2\n",
      "d  7 -1  6\n"
     ]
    }
   ],
   "source": [
    "# 运行df1按值排序，指定按A列的值排序\n",
    "print(df1.sort_values(by=\"A\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "577289eb-877e-4aec-8d15-3ff4f0572647",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T09:37:21.113446Z",
     "iopub.status.busy": "2022-05-04T09:37:21.113157Z",
     "iopub.status.idle": "2022-05-04T09:37:21.121804Z",
     "shell.execute_reply": "2022-05-04T09:37:21.120910Z",
     "shell.execute_reply.started": "2022-05-04T09:37:21.113419Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   B  C  A\n",
      "a  0  8  2\n",
      "b  3  1  0\n",
      "d  7 -1  6\n",
      "c  9  4 -3\n"
     ]
    }
   ],
   "source": [
    "# 运行df1按值排序，指定按A和C列的值排序\n",
    "print(df1.sort_values(by=[\"B\", \"C\"]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "80e37318-7ef6-42d8-b946-e92aff4a5bdb",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T09:30:10.549447Z",
     "iopub.status.busy": "2022-05-04T09:30:10.549145Z",
     "iopub.status.idle": "2022-05-04T09:30:10.556470Z",
     "shell.execute_reply": "2022-05-04T09:30:10.555364Z",
     "shell.execute_reply.started": "2022-05-04T09:30:10.549422Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   B  A  C\n",
      "b  3  0  1\n",
      "d  7  6 -1\n",
      "c  9 -3  4\n",
      "a  0  2  8\n"
     ]
    }
   ],
   "source": [
    "# 运行df1按值排序，指定按a行的值排序\n",
    "print(df1.sort_values(by=\"a\", axis=1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "73ba9ce1-4a5f-48c4-9dfd-58563ccdbff9",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T09:45:54.100322Z",
     "iopub.status.busy": "2022-05-04T09:45:54.099999Z",
     "iopub.status.idle": "2022-05-04T09:45:54.108995Z",
     "shell.execute_reply": "2022-05-04T09:45:54.108279Z",
     "shell.execute_reply.started": "2022-05-04T09:45:54.100294Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   A  C  B\n",
      "b  0  1  3\n",
      "d  6 -1  7\n",
      "c -3  4  9\n",
      "a  2  8  0\n"
     ]
    }
   ],
   "source": [
    "# 运行df1按值排序，指定按a行的值排序\n",
    "print(df1.sort_values(by=[\"b\", \"d\"], axis=1))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1a630da3-73bd-44b1-a57f-605be43232a8",
   "metadata": {},
   "source": [
    "## 三、唯一值和成员属性"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6484e6d4-8064-4c26-99f4-861b78c0db3b",
   "metadata": {},
   "source": [
    "### 3.1 Series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "91530c7b-2a81-4b54-91e8-94145c62e3af",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T18:11:59.117115Z",
     "iopub.status.busy": "2022-05-04T18:11:59.116853Z",
     "iopub.status.idle": "2022-05-04T18:11:59.121746Z",
     "shell.execute_reply": "2022-05-04T18:11:59.121107Z",
     "shell.execute_reply.started": "2022-05-04T18:11:59.117091Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    2\n",
      "1    3\n",
      "2    5\n",
      "3    6\n",
      "4    7\n",
      "5    8\n",
      "6    3\n",
      "7    6\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# 创建Series对象s2\n",
    "s2 = pd.Series([2, 3, 5, 6, 7, 8, 3, 6])\n",
    "print(s2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "fe6bb5bb-f5d7-4c9a-b5c1-c13d440e85a1",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T18:21:34.553381Z",
     "iopub.status.busy": "2022-05-04T18:21:34.553129Z",
     "iopub.status.idle": "2022-05-04T18:21:34.559430Z",
     "shell.execute_reply": "2022-05-04T18:21:34.558652Z",
     "shell.execute_reply.started": "2022-05-04T18:21:34.553354Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([2, 3, 5, 6, 7, 8])"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 获取s2中的唯一值\n",
    "s2Unique = s2.unique()\n",
    "s2Unique"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "edeb514c-b7e7-4ba1-bd88-f7470e07ce21",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T18:31:33.578781Z",
     "iopub.status.busy": "2022-05-04T18:31:33.578539Z",
     "iopub.status.idle": "2022-05-04T18:31:33.588041Z",
     "shell.execute_reply": "2022-05-04T18:31:33.586991Z",
     "shell.execute_reply.started": "2022-05-04T18:31:33.578758Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    2\n",
      "a    3\n",
      "a    5\n",
      "b    6\n",
      "b    7\n",
      "b    8\n",
      "b    3\n",
      "b    6\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# 创建Series对象s2\n",
    "s2 = pd.Series([2, 3, 5, 6, 7, 8, 3, 6], index=[\"a\", \"a\", \"a\", \"b\", \"b\", \"b\", \"b\", \"b\"])\n",
    "print(s2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "2f322732-c87a-4e2e-9f0d-bc273811e804",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T18:37:40.835651Z",
     "iopub.status.busy": "2022-05-04T18:37:40.835404Z",
     "iopub.status.idle": "2022-05-04T18:37:40.840720Z",
     "shell.execute_reply": "2022-05-04T18:37:40.840005Z",
     "shell.execute_reply.started": "2022-05-04T18:37:40.835624Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['a', 'b'], dtype='object')"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 获取s2中的索引标签唯一值\n",
    "s2.index.unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "c264c8e2-1cb4-4484-8eb2-e5ccf1225f0a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T18:43:17.853304Z",
     "iopub.status.busy": "2022-05-04T18:43:17.853047Z",
     "iopub.status.idle": "2022-05-04T18:43:17.858436Z",
     "shell.execute_reply": "2022-05-04T18:43:17.857460Z",
     "shell.execute_reply.started": "2022-05-04T18:43:17.853275Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 判断s2的标签索引是不是唯一的\n",
    "s2.index.is_unique"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "c253f0a2-7f85-4d5c-afaf-f9803db9bd14",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T18:52:42.188197Z",
     "iopub.status.busy": "2022-05-04T18:52:42.187949Z",
     "iopub.status.idle": "2022-05-04T18:52:42.195379Z",
     "shell.execute_reply": "2022-05-04T18:52:42.194705Z",
     "shell.execute_reply.started": "2022-05-04T18:52:42.188169Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3    2\n",
       "6    2\n",
       "2    1\n",
       "5    1\n",
       "7    1\n",
       "8    1\n",
       "dtype: int64"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 统计每个值出现的次数\n",
    "s2.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "629af777-b881-4f83-8325-227313d72974",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T19:03:50.219470Z",
     "iopub.status.busy": "2022-05-04T19:03:50.219093Z",
     "iopub.status.idle": "2022-05-04T19:03:50.225874Z",
     "shell.execute_reply": "2022-05-04T19:03:50.224956Z",
     "shell.execute_reply.started": "2022-05-04T19:03:50.219424Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    False\n",
       "a    False\n",
       "a    False\n",
       "b     True\n",
       "b    False\n",
       "b    False\n",
       "b    False\n",
       "b     True\n",
       "dtype: bool"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 判断某个数值是否有在当前的对象里出现\n",
    "s2.isin([6])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "e57199c3-a750-46aa-ba91-208069ec20bc",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T19:10:01.731228Z",
     "iopub.status.busy": "2022-05-04T19:10:01.730853Z",
     "iopub.status.idle": "2022-05-04T19:10:01.737246Z",
     "shell.execute_reply": "2022-05-04T19:10:01.736633Z",
     "shell.execute_reply.started": "2022-05-04T19:10:01.731198Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    False\n",
       "a    False\n",
       "a    False\n",
       "b     True\n",
       "b    False\n",
       "b     True\n",
       "b    False\n",
       "b     True\n",
       "dtype: bool"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 判断某些数值是否有在当前的对象里出现\n",
    "s2.isin([6, 8])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "33ad4083-87ed-46af-b41e-b5be600eba19",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T19:15:48.566444Z",
     "iopub.status.busy": "2022-05-04T19:15:48.566186Z",
     "iopub.status.idle": "2022-05-04T19:15:48.569807Z",
     "shell.execute_reply": "2022-05-04T19:15:48.568834Z",
     "shell.execute_reply.started": "2022-05-04T19:15:48.566418Z"
    },
    "tags": []
   },
   "source": [
    "### 3.2 DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "id": "b4a39477-3c9e-4fa5-a532-a75774532e06",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T19:18:15.854245Z",
     "iopub.status.busy": "2022-05-04T19:18:15.853995Z",
     "iopub.status.idle": "2022-05-04T19:18:15.860111Z",
     "shell.execute_reply": "2022-05-04T19:18:15.859446Z",
     "shell.execute_reply.started": "2022-05-04T19:18:15.854218Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   B  C  A\n",
      "b  3  1  0\n",
      "d  7 -1  6\n",
      "c  9  4 -3\n",
      "a  0  8  2\n"
     ]
    }
   ],
   "source": [
    "# 查看df1\n",
    "print(df1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "6f0d0066-c012-434d-8876-4d218551fbf2",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-04T19:20:08.388124Z",
     "iopub.status.busy": "2022-05-04T19:20:08.387873Z",
     "iopub.status.idle": "2022-05-04T19:20:08.394669Z",
     "shell.execute_reply": "2022-05-04T19:20:08.393675Z",
     "shell.execute_reply.started": "2022-05-04T19:20:08.388097Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       B      C      A\n",
      "b   True  False  False\n",
      "d  False  False  False\n",
      "c  False  False  False\n",
      "a  False  False   True\n"
     ]
    }
   ],
   "source": [
    "# 判断某些数值是否有在当前的对象里出现\n",
    "print(df1.isin([3, 2]))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8f7bf7f1-eb33-4574-8eb8-018ef65f2394",
   "metadata": {},
   "source": [
    "## 四、处理缺失数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "id": "464f803d-445d-4244-a81d-8031dca06f5d",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-05T01:46:56.316961Z",
     "iopub.status.busy": "2022-05-05T01:46:56.316713Z",
     "iopub.status.idle": "2022-05-05T01:46:56.324426Z",
     "shell.execute_reply": "2022-05-05T01:46:56.323524Z",
     "shell.execute_reply.started": "2022-05-05T01:46:56.316935Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2\n",
      "0  0.551792  1.292545 -0.944819\n",
      "1  1.000000  3.000000       NaN\n",
      "2       NaN  6.000000       NaN\n",
      "3  1.000000  2.000000  3.000000\n"
     ]
    }
   ],
   "source": [
    "# 创建有空值的DataFrame\n",
    "df2 = pd.DataFrame(\n",
    "    [np.random.randn(3), \n",
    "     [1.0, 3.0, np.nan],\n",
    "     [np.nan, 6.0, np.nan],\n",
    "     [1.0, 2.0, 3.0]]\n",
    ")\n",
    "print(df2)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5c8caa48-8907-4612-bff9-9d891ba0354a",
   "metadata": {},
   "source": [
    "### 4.1 判断是否存在缺失值 - isnull"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "id": "8b1fdc67-5869-4637-b45e-69bab1ec4f91",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-05T02:05:28.935379Z",
     "iopub.status.busy": "2022-05-05T02:05:28.935127Z",
     "iopub.status.idle": "2022-05-05T02:05:28.941646Z",
     "shell.execute_reply": "2022-05-05T02:05:28.940690Z",
     "shell.execute_reply.started": "2022-05-05T02:05:28.935351Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       0      1      2\n",
      "0  False  False  False\n",
      "1  False  False   True\n",
      "2   True  False   True\n",
      "3  False  False  False\n"
     ]
    }
   ],
   "source": [
    "# 用isnull判断df2中是否存在缺失值\n",
    "print(df2.isnull())"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3bc5a29f-850a-4598-abf3-239eaf4041b8",
   "metadata": {},
   "source": [
    "### 4.2 删除缺失数据 - dropna"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "id": "1c86bc5e-3e49-4755-9d04-67d05e9f32de",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-05T02:16:45.282561Z",
     "iopub.status.busy": "2022-05-05T02:16:45.282313Z",
     "iopub.status.idle": "2022-05-05T02:16:45.289962Z",
     "shell.execute_reply": "2022-05-05T02:16:45.289320Z",
     "shell.execute_reply.started": "2022-05-05T02:16:45.282534Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2\n",
      "0  0.551792  1.292545 -0.944819\n",
      "3  1.000000  2.000000  3.000000\n"
     ]
    }
   ],
   "source": [
    "# 用dropna删除缺失值，默认丢弃行数据\n",
    "print(df2.dropna())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "id": "266f55df-9f1a-4b62-9720-86e566e5500d",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-05T02:24:52.214090Z",
     "iopub.status.busy": "2022-05-05T02:24:52.213845Z",
     "iopub.status.idle": "2022-05-05T02:24:52.220446Z",
     "shell.execute_reply": "2022-05-05T02:24:52.219782Z",
     "shell.execute_reply.started": "2022-05-05T02:24:52.214063Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          1\n",
      "0  1.292545\n",
      "1  3.000000\n",
      "2  6.000000\n",
      "3  2.000000\n"
     ]
    }
   ],
   "source": [
    "# 用dropna删除缺失值，指定丢弃列数据\n",
    "print(df2.dropna(axis=1))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "31065979-cd7e-4dc5-b258-271d7cb9b52b",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-05T02:28:03.294032Z",
     "iopub.status.busy": "2022-05-05T02:28:03.293330Z",
     "iopub.status.idle": "2022-05-05T02:28:03.297511Z",
     "shell.execute_reply": "2022-05-05T02:28:03.296723Z",
     "shell.execute_reply.started": "2022-05-05T02:28:03.294009Z"
    }
   },
   "source": [
    "### 4.3 填充缺失值 - fillna"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "id": "9b8274b9-9537-40c9-9690-b398ea687cc5",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-05T02:37:01.780782Z",
     "iopub.status.busy": "2022-05-05T02:37:01.780571Z",
     "iopub.status.idle": "2022-05-05T02:37:01.785761Z",
     "shell.execute_reply": "2022-05-05T02:37:01.785122Z",
     "shell.execute_reply.started": "2022-05-05T02:37:01.780763Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2\n",
      "0  0.551792  1.292545 -0.944819\n",
      "1  1.000000  3.000000 -2.000000\n",
      "2 -2.000000  6.000000 -2.000000\n",
      "3  1.000000  2.000000  3.000000\n"
     ]
    }
   ],
   "source": [
    "# 用fillna填充缺失值\n",
    "print(df2.fillna(-2.))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
